* FIX_OUTLIER_PPSR50_2002_07.SAS -- examines outliers in trailer data;
* THIS IS ON the PPSR>0.50 group;

%include "ASMimplibs.sas";


* options obs=5000;run;

%macro chk(d,i,s);

*      d = dataset name (based on product/industry);
*      i = predominant industry of product;
*      s = last period of good data;  * changed from s< to s<=;

      data phy&d;
       	set fhs.phy&d;
	if phyflag=. then phyflag=0;
      run;

      proc sort data=phy&d; by survu_id; run; 

      proc summary data=phy&d nway;
        class survu_id;
        var phyflag;
       output out=avgphy mean=aphyflag;
      run;

      title "Outlier corrections for &d";run;


**      data db.phy&i;
      data phy&d;
       merge phy&d avgphy ;
       by survu_id;
      run;

      data all;
       set phy&d;
       /** if aphyflag>0; Keep plants that have no physical data. 11/6/12 **/
       if ppsr1>0.50;
       mer=1;
       if year<=&s;
      run;

       * 0) CREATE OTHER MEASURES OF PRODUCTIVITY;

       %macro make;

            * starting by creating pbard;
            * weighted geometric mean of price;
            * weights are now nq2 were pqs;
            data all;
             set all;
               lpricex = log(price);  /* Added 11/5/2012.  */
               nq2 = pqs*price;
               rnq2 = nq2/piship;  /* Added 11/6/2012 -Kirk */
            run;

            proc summary data=all nway;
              class year;
              var lpricex;             * recall lpricex=log(price);
              weight nq2;              * recall nq2=pqs*price;
              output out=avgprice mean=lpbar;
            run;

            data avgprice;
             set avgprice;
             pbar=exp(lpbar);
             mer=1;
             run;

            data p2002 (keep=norm mer);
             set avgprice;
             if year=2002;
             norm=pbar;
             mer=1;
             run;

            data avgprice (keep=year pbard);
             merge avgprice p2002;
             by mer;
             pbard=pbar/norm;
            run;

            proc sort data=all;by year;run;
            proc sort data=avgprice; by year; run;

           data all;
             merge all avgprice;
           by year;
	   phyq=pqs/ppsr1; /** Added 11/5/2012 -Kirk **/
           q2=nq2/pbard;    * nq1=price*phyq;
         **  q3=nq/pbard;     * nq =tvs+dinv;
           if q2 > 0 then lq2 = log(q2);
         **  if q3 > 0 then lq3 = log(q3);
       	   if phyq   > 0  then lqphy = log(phyq);  /** Added 11/5/2012 -Kirk **/
           ltfp = lnq - (iake+iaks)*lk - ial*lth - iam*lm - iae*le; /** Added 11/5/2012 -Kirk **/
           ltfpphy =  lqphy -(iake+iaks)*lk - ial*lth -iam*lm -iae*le ; /** Added 11/5/2012 -Kirk **/

           ltfp2 = lq2 - (iake+iaks)*lk - ial*lth - iam*lm - iae*le; /** Added 11/5/2012 -Kirk **/
           if price>0 and pbard>0 then lprice2=log(price/pbard); * relative price;
           counter=1;
           run;

       %mend;
    
       %make;

        title "regression vars for &d before corrections";run;
        proc univariate data=all;var lprice2 ltfp ltfp2 /* ltfp3 */ ltfpphy;run;

        title "counter before corrections for &d";run;
        proc means data=all;var counter;run;


  * 1) IMPUTE DELETIONS (Tim Dunne style);
       data all;
        set all;
        if sw>0 then rat1=cp/sw;
        if sw>0 then rat2=tvs/sw;
        if sw>0 then rat3=phyq/sw;
       run;

       data allm;
        set all;
/** Comment out IMPUTE DELETIONS
        if rat1=. then delete;
        if rat1=0 then delete;
        if rat2=. then delete;
        if rat2=0 then delete;
        if rat3=. then delete;
        if rat3=0 then delete;
***/
       run;

       proc univariate data=allm noprint;
         by year; var rat1 rat2 rat3;
         output out=mrat mode=mrat1 mrat2 mrat3; run;

       data all;
         merge all mrat;by year;
/** Comment out deletions based on modal ratios for CART-completed data
         if rat1=mrat1 and mrat1^=. and rat2=mrat2 and mrat2^=. then delete;
         if rat3=mrat3 and mrat3^=. then delete;
***/
       run;

      title "counter after impute correction";run;
      proc means data=all;var counter;run;

* 2) MISSING VALUE DELETIONS;

     data all;
      set all;
      if "&d" in ("boxesf","breadf", "carbonf", "coffeef", "floorf","plywoodf") and year=2007
      then do;
        if ( ltfp=. ) then delete;
      end;
      else do;
        if ( ltfp=. or ltfp2=. /* or ltfp3=.*/ or lprice2=. or ltfpphy=. ) 
        then delete;
      end;
     run;

* 2) PRICE OUTLIER CORRECTIONS;

     proc sort data=all;by year;
 
     proc univariate data=all noprint;
      by year;var price;
      output out=pmed median=pmed;run;


     data all;
      merge all pmed;by year;
      run;

     /* Changed this so that we don't delete plants in 2007 
        in industries that don't have quantity data (and 
        thus don't have price data). */
     data all;
      set all;
      if ("&d" in ("gasf", "icef") or year=2002)
      and price <.1*pmed or price>10*pmed then delete;
      /** if .1*pmed<=price<=10*pmed; **/
     run;

     /* proc sort data=all; by priceoutlier; run; */
     title "counter after price correction for &d";run;
     proc means data=all;var counter; by year; run;

* 3) INPUT OUTLIER CORRECTIONS;

     data all;
      set all;
    ***  mattvs=mat/tvs; /* Commented out by Kirk, 11/5/2012 */
      mattvs=cm/tvs;     /* Added by Kirk, 11/5/2012 */
      *mattvs = sum(sum(cp,cw),cr)/tvs;
      *mattvs = cm/tvs;
    *** mat2tvs=(sum(cp,cw))/tvs;  /* Commented out by Kirk, 11/5/2012 */
      mat2tvs=(sum(cm))/tvs;       /* Added by Kirk, 11/5/2012 */
                                   /* Modified to use CM, 11/14/2012 */
      swtvs=sw/tvs;
      iamrule=iam/10;    * the rule is they can be 1/10 of industry;
      ialrule=ial/10;    * the rule is they can be 1/10 of industry;
     run;

     * Checking the impact of the rules;
     proc means data=all;var mattvs swtvs iam ial;run;

     data checkall;
      set all;
      if (iamrule < mattvs and mat2tvs <1) then mattvsf=0; else mattvsf=1;
      if ialrule < swtvs  <1 then swtvsf=0; else swtvsf=1;      
     run;
     title "all";run;
     proc freq data=checkall;table mattvsf swtvsf;run;

    data checktop;
     set all;
     if mat2tvs <1 then mattvsf=0; else mattvsf=1; 
     if swtvs <1 then swtvsf=0;else swtvsf=1;
    run;
    title "top";run;
    proc freq data=checktop;table mattvsf swtvsf;run;

    data checkbot;
     set all;
     if iamrule < mattvs then mattvsf=0; else mattvsf=1;
     if ialrule < swtvs then swtvsf=0; else swtvsf=1;
    run;
    title "bot";run;
    proc freq data=checkbot;table mattvsf swtvsf;run;

    * Applying the rules;
    data all;
     set all;
     if (iamrule < mattvs and mat2tvs <1); 
     if ialrule < swtvs <1;
   run;

   * I added this sort so that I could see the counts by year.--Kirk;
   proc sort data=all; by year;run;

   title "counter after input correction";run;
   proc means data=all;var counter;by year;run;

* 4) TRIMMING LTFPPHY TAILS;

     proc univariate data=all;by year;
       var ltfpphy;
       output out=trim pctlpre = ltfpp pctlpts = 1 99;
     run;

    data all;
     merge all trim;by year;
     /** Only trim physical TFP outlier for industry-years
         in which we have physical TFP measures. **/
     if ("&d" in ("gasf", "icef") or year=2002)
     then do;
       if ltfpp1<=ltfpphy<=ltfpp99 
       then ltfpphy=ltfpphy;
       else delete;
     end;
     else ltfpphy=.;
    /** Flag outliers instead of dropping them? 
       if ltfpphy<ltfpp1 or ltfpphy>ltfpp99 then ltfpphy_outlier=1;
       else ltfpphy_outlier=0;
     **/
    run;

    proc univariate data=all;by year;
     var ltfpphy;
    run;

   /** proc sort data=all; by ltfpphy_outlier; run; **/

   title "counter for &d after trimming ltfpphy 1% tails";run;
   proc means data=all; var counter; by year; run;

* 5) NOW HAVE TO REDO PRICE2;

     data all;
       set all (drop=pbard q2 /* q3 */ lq2 /* lq3 */ ltfp2 /* ltfp3 */ lprice2);
     run;
     %make;

* 6) AND THE FINAL DATA IS;

     title "regression vars AFTER corrections for &d";run;
     proc univariate data=all; var lprice2 ltfp ltfp2 /* ltfp3 */ ltfpphy;run;

     title "counter after everything for &d"; run;
     proc means data=all; var counter;run;

     data ppsr50.all&d;  
/*     data ppsr50.&d; */ 
      set all;
     run;

    proc contents data=ppsr50.all&d; run; 

    title "Correlations for &d reported in Table";run;
     proc corr data=all;
     var lnq lq2 lqphy ltfp ltfp2 /* ltfp3 */ ltfpphy lprice2;
     run;

proc means data=all; var pqs_imp; title1 "pqs_imp distribution "; run;

proc corr data=all; var lqphy ltfpphy lprice2; where pqs_imp ne 1; title1 "Correlations when pqs_imp=0"; run;

proc corr data=all; var lqphy ltfpphy lprice2; where pqs_imp=1; title1 "Correlations when pqs_imp=1"; run;
data all; set all; if pqs_imp=. then pqs_imp=0; run;
proc reg data=all; model lqphy=lprice; where year=2002; run;
proc reg data=all; model lqphy=lprice; where year=2002 and pqs_imp=0; run;
proc reg data=all; model lqphy=lprice; where year=2002 and pqs_imp=1; run;
proc reg data=all; model ltfpphy=lprice; where year=2002; run;
proc reg data=all; model ltfpphy=lprice; where year=2002 and pqs_imp=1; run;
proc reg data=all; model ltfpphy=lprice; where year=2002 and pqs_imp=0; run;
proc reg data=all; model lprice=pqs_imp; where year=2002; run;
proc reg data=all; model lqphy=pqs_imp; where year=2002; run;
proc reg data=all; model exit02=pqs_imp; where year=2002; run;
proc reg data=all; model lk=pqs_imp; where year=2002; run;

proc reg data=all; model exit02=ltfpphy lprice2 lk pqs_imp; where year=2002; run;
proc reg data=all; mdoel exit02=ltfpphy lprice2 lk; where year=2002; run;

    proc sort data=all; by year; run;

    proc means data=all N p10 q1 q3 p90 stddev NOPRINT;
     var lnq lq2 lqphy ltfp ltfp2 /* ltfp3 */ ltfpphy lprice2;
     by year /* _IMPUTATION_ */;
      output out=&d._dispersion (keep = year Nq Ntfp Ntfp2 Ntfpphy Nprice2 lq_p10 lq_q1 lq_q3 lq_p90 lq_sd
                                             lq2_p10 lq2_q1 lq2_q3 lq2_p90 lq2_sd
                                             lqphy_p10 lqphy_q1 lqphy_q3 lqphy_p90 lqphy_sd
                                             ltfp_p10 ltfp_q1 ltfp_q3 ltfp_p90 ltfp_sd
                                             ltfp2_p10 ltfp2_q1 ltfp2_q3 ltfp2_p90 ltfp2_sd
                                             ltfpphy_p10 ltfpphy_q1 ltfpphy_q3 ltfpphy_p90 ltfpphy_sd
                                             lprice2_p10 lprice2_q1 lprice2_q3 lprice2_p90 lprice2_sd) 
      N(lnq)=Nq N(ltfp)=Ntfp N(ltfp2)=Ntfp2 N(ltfpphy)=Ntfpphy N(lprice2)=Nprice2 p10(lnq)=lq_p10 q1(lnq)=lq_q1 q3(lnq)=lq_q3 p90(lnq)=lq_p90 stddev(lnq)=lq_sd
      p10(lq2)=lq2_p10 q1(lq2)=lq2_q1 q3(lq2)=lq2_q3 p90(lq2)=lq2_p90 stddev(lq2)=lq2_sd
      p10(lqphy)=lqphy_p10 q1(lqphy)=lqphy_q1 q3(lqphy)=lqphy_q3 p90(lqphy)=lqphy_p90 stddev(lqphy)=lqphy_sd
      p10(ltfp)=ltfp_p10 q1(ltfp)=ltfp_q1 q3(ltfp)=ltfp_q3 p90(ltfp)=ltfp_p90 stddev(ltfp)=ltfp_sd
      p10(ltfp2)=ltfp2_p10 q1(ltfp2)=ltfp2_q1 q3(ltfp2)=ltfp2_q3 p90(ltfp2)=ltfp2_p90 stddev(ltfp2)=ltfp2_sd
      p10(ltfpphy)=ltfpphy_p10 q1(ltfpphy)=ltfpphy_q1 q3(ltfpphy)=ltfpphy_q3 p90(ltfpphy)=ltfpphy_p90 stddev(ltfpphy)=ltfpphy_sd
      p10(lprice2)=lprice2_p10 q1(lprice2)=lprice2_q1 q3(lprice2)=lprice2_q3 p90(lprice2)=lprice2_p90 stddev(lprice2)=lprice2_sd;
    run;

    data &d._dispersion;
     set &d._dispersion;
     lq_iqr = lq_q3 - lq_q1;
     lq_90_10 = lq_p90 - lq_p10;
     q_75_25_ratio = exp(lq_iqr);
     q_90_10_ratio = exp(lq_90_10);

     lq2_iqr = lq2_q3 - lq2_q1;
     lq2_90_10 = lq2_p90 - lq2_p10;
     q2_75_25_ratio = exp(lq2_iqr);
     q2_90_10_ratio = exp(lq2_90_10);

     lqphy_iqr = lqphy_q3 - lqphy_q1;
     lqphy_90_10 = lqphy_p90 - lqphy_p10;
     qphy_75_25_ratio = exp(lqphy_iqr);
     qphy_90_10_ratio = exp(lqphy_90_10);

     ltfp_iqr = ltfp_q3 - ltfp_q1;
     ltfp_90_10 = ltfp_p90 - ltfp_p10;
     tfp_75_25_ratio = exp(ltfp_iqr);
     tfp_90_10_ratio = exp(ltfp_90_10);

     ltfp2_iqr = ltfp2_q3 - ltfp2_q1;
     ltfp2_90_10 = ltfp2_p90 - ltfp2_p10;
     tfp2_75_25_ratio = exp(ltfp2_iqr);
     tfp2_90_10_ratio = exp(ltfp2_90_10);

     ltfpphy_iqr = ltfpphy_q3 - ltfpphy_q1;
     ltfpphy_90_10 = ltfpphy_p90 - ltfpphy_p10;
     tfpphy_75_25_ratio = exp(ltfpphy_iqr);
     tfpphy_90_10_ratio = exp(ltfpphy_90_10);

     lprice2_iqr = lprice2_q3 - lprice2_q1;
     lprice2_90_10 = lprice2_p90 - lprice2_p10;
     price2_75_25_ratio = exp(lprice2_iqr);
     price2_90_10_ratio = exp(lprice2_90_10);
    run;

    proc means data=all N p10 q1 q3 p90 stddev NOPRINT;
     var lprice2;
     by year ;
    where pqs_imp ne 1;
      output out=&d._dispersion_nonimp (keep = year Nprice2_nonimp lprice2_p10 lprice2_q1 lprice2_q3 lprice2_p90 lprice2_sd) 
      N(lprice2)=Nprice2_nonimp 
      p10(lprice2)=lprice2_p10 q1(lprice2)=lprice2_q1 q3(lprice2)=lprice2_q3 p90(lprice2)=lprice2_p90 stddev(lprice2)=lprice2_sd;
    run;

   data &d._dispersion_nonimp;
    set &d._dispersion_nonimp;
     lprice2_iqr = lprice2_q3 - lprice2_q1;
     lprice2_90_10 = lprice2_p90 - lprice2_p10;
     price2_75_25_ratio_nonimp = exp(lprice2_iqr);
     price2_90_10_ratio_nonimp = exp(lprice2_90_10);
    run;

    proc means data=all N p10 q1 q3 p90 stddev NOPRINT;
     var lprice2;
     by year ;
    where pqs_imp ne 1 and tvs_imp=0 and cm_imp=0 and ee_imp=0 and cf_imp=0 and ph_imp=0 and tab_imp=0 and tae_imp=0 and pw_imp=0 and ww_imp=0 and te_imp=0;
      output out=&d._dispersion_noimp (keep = year Ntfp Ntfpphy_noimp 
                                             ltfp_p10 ltfp_q1 ltfp_q3 ltfp_p90 
                                             ltfpphy_p10 ltfpphy_q1 ltfpphy_q3 ltfpphy_p90 ) 
      N(ltfp)=Ntfp N(ltfpphy)=Ntfpphy_noimp 
      p10(ltfp)=ltfp_p10 q1(ltfp)=ltfp_q1 q3(ltfp)=ltfp_q3 p90(ltfp)=ltfp_p90 
      p10(ltfpphy)=ltfpphy_p10 q1(ltfpphy)=ltfpphy_q1 q3(ltfpphy)=ltfpphy_q3 p90(ltfpphy)=ltfpphy_p90 
      ; 
    run;

   data &d._dispersion_noimp;
    set &d._dispersion_noimp;
     ltfp_iqr = ltfp_q3 - ltfp_q1;
     ltfp_90_10 = ltfp_p90 - ltfp_p10;
     tfp_75_25_ratio_noimp = exp(ltfp_iqr);
     tfp_90_10_ratio_noimp = exp(ltfp_90_10);

     ltfpphy_iqr = ltfpphy_q3 - ltfpphy_q1;
     ltfpphy_90_10 = ltfpphy_p90 - ltfpphy_p10;
     tfpphy_75_25_ratio_noimp = exp(ltfpphy_iqr);
     tfpphy_90_10_ratio_noimp = exp(ltfpphy_90_10);
    run;
 

 %mend;

/** Keep data for 2002 and 2007, even though several industries
   don't have physical data in one or both years.
***/
 
%chk(gasf,32411000,2007);
%chk(sugarf,31131200,2007);
%chk(icef,31211300,2007);  
%chk(breadf,31181200,2007); 
%chk(coffeef,31192000,2007); 
%chk(plywoodf,32121100,2007);
%chk(floorf,321918,2007);   
%chk(carbonf,325182,2007);  
%chk(boxesf,322211,2007);  
*%chk(concf,32732000,2007); 

/* Stack the industry TFP dispersion estimates, 
  and display them together. */

data gasf_dispersion; set gasf_dispersion; industry = "gas    "; run;
data sugarf_dispersion; set sugarf_dispersion; industry = "sugar  "; run;
data icef_dispersion; set icef_dispersion; industry = "ice    "; run;
data breadf_dispersion; set breadf_dispersion; industry = "bread  "; run;
data coffeef_dispersion; set coffeef_dispersion; industry = "coffee "; run;
data plywoodf_dispersion; set plywoodf_dispersion; industry = "plywood"; run;
data floorf_dispersion; set floorf_dispersion; industry = "floor  "; run;
data carbonf_dispersion; set carbonf_dispersion; industry = "carbon "; run;
data boxesf_dispersion; set boxesf_dispersion; industry = "boxes  "; run;

data all_dispersion;
 set plywoodf_dispersion
     gasf_dispersion
     sugarf_dispersion
     icef_dispersion
     breadf_dispersion
     coffeef_dispersion
     floorf_dispersion
     carbonf_dispersion
     boxesf_dispersion;
run;

data ppsr50.all_dispersion;
 set all_dispersion;
run;

proc sort data=all_dispersion; by industry year; run;

proc print data= all_dispersion;
 var industry year Nq lq_sd q_75_25_ratio q_90_10_ratio;
title1 'Real TVS Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;

proc print data= all_dispersion;
 var industry year lq2_sd q2_75_25_ratio q2_90_10_ratio;
title1 'Revenue Output Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;

proc print data= all_dispersion;
 var industry year lqphy_sd qphy_75_25_ratio qphy_90_10_ratio;
title1 'Physical Output Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;


proc print data= all_dispersion;
 var industry year Ntfp ltfp_sd tfp_75_25_ratio tfp_90_10_ratio;
title1 'Traditional TFP Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;

proc print data= all_dispersion;
 var industry year Ntfp2 ltfp2_sd tfp2_75_25_ratio tfp2_90_10_ratio;
title1 'Revenue TFP Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;

proc print data= all_dispersion;
 var industry year Ntfpphy ltfpphy_sd tfpphy_75_25_ratio tfpphy_90_10_ratio;
title1 'Physical TFP Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;

proc print data= all_dispersion;
 var industry year Nprice2 lprice2_sd price2_75_25_ratio price2_90_10_ratio;
title1 'Price Dispersion measures by industry and year';
title2 'Bureau-completed data';
run;

data gasf_dispersion_nonimp; set gasf_dispersion_nonimp; industry = "gas    "; run;
data sugarf_dispersion_nonimp; set sugarf_dispersion_nonimp; industry = "sugar  "; run;
data icef_dispersion_nonimp; set icef_dispersion_nonimp; industry = "ice    "; run;
data breadf_dispersion_nonimp; set breadf_dispersion_nonimp; industry = "bread  "; run;
data coffeef_dispersion_nonimp; set coffeef_dispersion_nonimp; industry = "coffee "; run;
data plywoodf_dispersion_nonimp; set plywoodf_dispersion_nonimp; industry = "plywood"; run;
data floorf_dispersion_nonimp; set floorf_dispersion_nonimp; industry = "floor  "; run;
data carbonf_dispersion_nonimp; set carbonf_dispersion_nonimp; industry = "carbon "; run;
data boxesf_dispersion_nonimp; set boxesf_dispersion_nonimp; industry = "boxes  "; run;

data all_dispersion_nonimp;
 set plywoodf_dispersion_nonimp
     gasf_dispersion_nonimp
     sugarf_dispersion_nonimp
     icef_dispersion_nonimp
     breadf_dispersion_nonimp
     coffeef_dispersion_nonimp
     floorf_dispersion_nonimp
     carbonf_dispersion_nonimp
     boxesf_dispersion_nonimp;
run;

proc sort data=all_dispersion_nonimp; by industry year; run;

proc print data= all_dispersion_nonimp;
 var industry year Nprice2_nonimp lprice2_sd price2_75_25_ratio_nonimp price2_90_10_ratio_nonimp;
title1 'Price Dispersion measures by industry and year';
title2 'Plants with non-imputed quantity data';
run;

data ppsr50.price_dispersion_all (keep = industry year Nprice2 price2_75_25_ratio price2_90_10_ratio);
 set all_dispersion;
 run;

data price_dispersion_nonimp (keep = industry year Nprice2_nonimp price2_75_25_ratio_nonimp price2_90_10_ratio_nonimp);
 set all_dispersion_nonimp;
 run;

data price_dispersion_ratios;
 merge price_dispersion_nonimp ppsr50.price_dispersion_all;
 by industry year;
  Nprice_impall_r = Nprice2_nonimp/Nprice2;
  price7525_impall_r = price2_75_25_ratio_nonimp/price2_75_25_ratio;
  price9010_impall_r = price2_90_10_ratio_nonimp/price2_90_10_ratio;
  if year=2007 and industry in ("boxes  ","sugar  ") then delete;
run;

proc means data= price_dispersion_ratios N mean stddev;
 var Nprice_impall_r price7525_impall_r price9010_impall_r;
title1 "Ratios of industry-year sample size and price dispersion";
title2 "Non-imputed data versus Bureau-completed data";
run;




data gasf_dispersion_noimp; set gasf_dispersion_noimp; industry = "gas    "; run;
data sugarf_dispersion_noimp; set sugarf_dispersion_noimp; industry = "sugar  "; run;
data icef_dispersion_noimp; set icef_dispersion_noimp; industry = "ice    "; run;
data breadf_dispersion_noimp; set breadf_dispersion_noimp; industry = "bread  "; run;
data coffeef_dispersion_noimp; set coffeef_dispersion_noimp; industry = "coffee "; run;
data plywoodf_dispersion_noimp; set plywoodf_dispersion_noimp; industry = "plywood"; run;
data floorf_dispersion_noimp; set floorf_dispersion_noimp; industry = "floor  "; run;
data carbonf_dispersion_noimp; set carbonf_dispersion_noimp; industry = "carbon "; run;
data boxesf_dispersion_noimp; set boxesf_dispersion_noimp; industry = "boxes  "; run;

data all_dispersion_noimp;
 set plywoodf_dispersion_noimp
     gasf_dispersion_noimp
     sugarf_dispersion_noimp
     icef_dispersion_noimp
     breadf_dispersion_noimp
     coffeef_dispersion_noimp
     floorf_dispersion_noimp
     carbonf_dispersion_noimp
     boxesf_dispersion_noimp;
run;

proc sort data=all_dispersion_noimp; by industry year; run;

proc print data= all_dispersion_noimp;
 var industry year Ntfpphy_noimp  tfp_75_25_ratio_noimp tfp_90_10_ratio_noimp tfpphy_75_25_ratio_noimp tfpphy_90_10_ratio_noimp;
title1 "Traditional TFP and Physical TFP measures by industry and year";
title2 "Complete-cases plants";
run;


data tfp_dispersion_noimp (keep = industry year Ntfpphy_noimp tfp_75_25_ratio_noimp tfp_90_10_ratio_noimp tfpphy_75_25_ratio_noimp tfpphy_90_10_ratio_noimp);
 set all_dispersion_noimp;
 run;

data tfp_dispersion_ratios;
 merge all_dispersion tfp_dispersion_noimp;
 by industry year;
  Ntfpphy_noimpall_r = Ntfpphy_noimp/Ntfpphy;
  tfp7525_noimpall_r = tfp_75_25_ratio_noimp/tfp_75_25_ratio;
  tfp9010_noimpall_r = tfp_90_10_ratio_noimp/tfp_90_10_ratio;
  tfpphy7525_noimpall_r = tfpphy_75_25_ratio_noimp/tfpphy_75_25_ratio;
  tfpphy9010_noimpall_r = tfpphy_90_10_ratio_noimp/tfpphy_90_10_ratio;
  if year=2007 and industry in ("boxes  ","sugar  ") then delete;
run;

proc means data= tfp_dispersion_ratios N mean stddev;
 var Ntfpphy_noimpall_r tfp7525_noimpall_r tfp9010_noimpall_r tfpphy7525_noimpall_r tfpphy9010_noimpall_r;
title1 "Ratios of industry-year sample size and TFP dispersion";
title2 "Complete-cases data over Bureau-completed data";
run;



